import scrapy
from bid.items import BidItem
from bid.tools import *


class GuizhouSpider(scrapy.Spider):
    name = 'guizhou'
    allowed_domains = ['ggzy.guizhou.gov.cn/']
    start_urls = ['https://ggzy.guizhou.gov.cn/igs/front/search/list.html?pageNumber=1&pageSize=10&siteId=500483&index=trades&type=infomation_v6&filter%5BchannelId%5D=5376927%2C5377100%2C5377337%2C5377101%2C5377103%2C5377338%2C5237520%2C5237521%2C5617491%2C5617492%2C5617493%2C5237523&orderProperty=docRelTime&orderDirection=desc&isPage=true']
    def parse(self, response):
        res = response.json()
        ls = res['page']['content']
        page = re.findall('pageNumber=(\d+)&',response.url)[0]
        url = response.url.replace('pageNumber='+page,'pageNumber='+str(int(page)+1))
        for l in ls:
            item = BidItem()
            item['link'] = 'https://ggzy.guizhou.gov.cn/jyxx/view.html?meteIds=' + l['MetaDataId']
            item['title'] = l['docTitle']
            item['time'] = l['crTime'][:10]
            item['classification'] = '贵州-'+f'{l["bulletinType"]}-{l["inTypeName"]}'
            item['content'] = get_content(l['docContent'], '')
            item = get_field(dict(item))
            yield item
            time.sleep(len(ls))
        yield scrapy.Request(url=url,callback=self.parse,dont_filter=True)